import requests
from bs4 import BeautifulSoup
from datacollection.lesson8.paiwu.models import *
from sqlalchemy.orm import Session
import time
from sqlalchemy import create_engine


url = 'mysql+mysqlconnector://ly:123456@localhost/test'
engine = create_engine(url)
sesson = Session(bind=engine)

header = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.74 Safari/537.36',
    'Content-Type': 'application/x-www-form-urlencoded'
}


def generate_data(i):
    return f'page.pageNo={i}&page.orderBy=&page.order=&province=320000000000&city=320100000000&registerentername=&xkznum=&treadname=&treadcode=&publishtime='


if __name__ == '__main__':
    for i in range(1, 6):
        print(f'Processing page {i}')
        try:
            req = requests.post('http://permit.mee.gov.cn/perxxgkinfo/syssb/xkgg/xkgg!licenseInformation.action',
                                data=generate_data(i), headers=header)

            soup = BeautifulSoup(req.text, features='lxml')
            table = soup.select('table.tabtd')[0]
            rows = []
            for row in table.select('tr')[1:]:
                cols = row.select('td')[:-1]
                cols = [col.text for col in cols]
                onerow = Paiwu(province=cols[0],
                               city=cols[1],
                               license=cols[2],
                               company=cols[3],
                               companytype=cols[4],
                               valid=cols[5],
                               startdate=cols[6])
                rows.append(onerow)
            sesson.add_all(rows)
            sesson.commit()
        except Exception as e:
            print(e)
        time.sleep(3)

    sesson.close()
